#!/usr/bin/evn python3
# -*- coding: utf-8 -*-
import re

# 读取字幕文件为结构化数据 [(index,startTime,endTime,text),()...]
def readSrt(filePath):
    with open(filePath,'r') as f:
        totalText = f.read()
    res = []
    # 循环单个音段
    for line in totalText.split("\n\n"):
        splits = line.split("\n")
        # 不是一个规范的音频片段
        if(len(splits)!=3):
            continue
        #(index, startTime, endTime, text)
        # 00: 00:01, 130 --> 00: 00:03, 570
        times = splits[1].split(" --> ")
        item = (splits[0],dateTimeStrToStamp(times[0]),dateTimeStrToStamp(times[1]),splits[2])
        res.append(item)
    return res;


#00: 00:01, 130 转为毫秒
def dateTimeStrToStamp(dateTimeStr):
    arr = re.split(':|,', dateTimeStr)
    hour = int(arr[0].strip())
    minute = int(arr[1].strip())
    second = int(arr[2].strip())
    milliSecond = int(arr[3].strip())
    if not (second==0):milliSecond+=second*1000
    if not (minute==0):milliSecond+=minute*1000*60
    if not (hour==0):milliSecond+=hour*1000*60*60
    return milliSecond
